import requests
import re
from lxml import etree
from myparse.chaojidddd import parse_code

# session = requests.Session()
# print(session.cookies)
# res = session.get("https://quotes.toscrape.com/login")
# token = re.search(r'<input type="hidden" name="csrf_token" value="(.*?)"/>', res.text).group(1)
# data = {
# "csrf_token": token,
# "username": 1,
# "password": 2
# }
# session.post("https://quotes.toscrape.com/login", data=data)
# res = session.get("https://quotes.toscrape.com/page/1/",)
# print(res.text)

headers = {
	"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}

session = requests.Session()

page = 1
while page < 579:
	print(f"正在爬取第{page}页")
	url = f"https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=22&page={page}"
	res = session.get(url, headers=headers)
	# print(res.text)
	tree = etree.HTML(res.text)
	items = tree.xpath('//table[@class="table_list_02"]//tr[position()>1]')
	if items:
		for item in items:
			print(item.xpath('./td/text()'))
		page += 1
	else:
		print(f"发现验证码")
		count = 0
		while True:
			image_res = session.get(f"https://www.bjcourt.gov.cn/yzm.jpg?n={count}", headers=headers)
			with open(f"verify.png", "wb") as f:
				f.write(image_res.content)

			code = parse_code("verify.png")

			data = {"yzm": code}
			res = session.post("https://www.bjcourt.gov.cn/cpws/checkkaptcha.htm", data=data, headers=headers)
			if res.status_code == 302:
				print(f"第{count}次识别失败")
				count += 1
			else:
				print(f"共{count}次识别成功")
				break




